PLOS Data
Link Scopus/WOS data by DOI to PLOS data
<- read.csv("PLOS_Dataset.csv")
PLOS colnames(PLOS)[3] <- "DI"
<- PLOS %>% filter(DI %in% scopus_wos_all_oa$DI) #Only 15 (with a Northumbria Author)
just_northumbria_PLOS<- PLOS %>% filter(DI %in% all_oa_corr$DI) # Only 9 (with a corresponding author from Northumbria.) Get descriptives for open access
corr_northumbria_PLOS
<- scopus_wos_all_oa[str_detect(scopus_wos_all_oa$JI, "PLOS"), ] # 43 journals.
just_plos_attempt <- all_oa_corr[str_detect(all_oa_corr$JI, "PLOS"), ] # 27 journals
just_ploscorr_attempt
#Discrepency between PLOS data and scopus/web of science. Could be due to PLOS being out of date.
OSF
<- read.csv("PURE_ORCID_EXTRACT.csv")
ORCID <- read.csv("OSF_names.csv")
northumbria_OSF
<- function(string) {
change_order <- strsplit(string, ",")[[1]]
words <- words[length(words):1]
words paste(words, collapse = " ")
}
$Name <- sapply(ORCID$Name, change_order)
ORCID
colnames(northumbria_OSF)[1] <- "Name"
# Issue with slight differnces in names, no complete matches.Fuzzy match is confusing when trying to merge and delete unmatching rows.
<- stringdist_join(ORCID, northumbria_OSF,
ORCID_OSF_join by='Name', #match based on team
mode='left', #use left join
method = "jw", #use jw distance metric
max_dist=99,
distance_col='dist') %>%
group_by(Name.x) %>%
slice_min(order_by=dist, n=2)
%>% filter(substr(Name.x, 1, 1) == substr(Name.y, 1, 1) & substr(Name.x, 2, 2) == substr(Name.y, 2, 2) & substr(Name.x, 3, 3) == substr(Name.y, 3, 3)) ORCID_OSF_join
# A tibble: 0 × 6
# Groups: Name.x [0]
# ℹ 6 variables: Name.x <chr>, Username <chr>, FTE <dbl>, ORCID <chr>, Name.y <chr>, dist <dbl>
<- ORCID_OSF_join[ORCID_OSF_join$dist <= .25, ]
ORCID_OSF_join
###write_csv(ORCID_OSF_join, "ORCID_OSF_join.csv") I manually wnet through the fuzzy matched and got rid of any that clearly were incorrect matches
<- read.csv("ORCID_OSF_join.csv")
ORCID_neat_join $dist <- NULL
ORCID_neat_join$FTE <- NULL
ORCID_neat_join$Name.y <- NULL ORCID_neat_join
#Afilliation (university of Northumbria) in Scopus then click authors then export as csv. Linking OSF to Scopus guys but ORCID from excel. Plus Manual checking for multiple matches.
<- read.csv("scopus A-Z.csv")
A_Z <- read.csv("scopus Z-A.csv")
Z_A
#Scopus by affiliation for authors. A-Z 4000 in one and Z-A 4000 in the other to capture all authors.
<- merge(A_Z, Z_A, by = "Auth.ID")
scopus_authors
<- subset(scopus_authors, select = c(-Author.Name.y, -Number.of.Documents.y, -Orc_ID.y, -Subject.Area.y, -X.y ))
scopus_authors colnames(scopus_authors)[2] <- "Name"
colnames(scopus_authors)[3] <- "Number_of_Documents"
colnames(scopus_authors)[4] <- "Subject_Area"
colnames(scopus_authors)[5] <- "ORCID"
colnames(scopus_authors)[6] <- "X"
== ''] <- NA
scopus_authors[scopus_authors == ''] <- NA
ORCID_neat_join[ORCID_neat_join
<- scopus_authors[!is.na(scopus_authors$ORCID), ]
scopus_authors <- ORCID_neat_join[!is.na(ORCID_neat_join$ORCID), ]
ORCID_neat_join
<- left_join(ORCID_neat_join, scopus_authors, by= "ORCID") ##Only 7 on OSF.ORCID and on Scopus. scopus_and_OSF